#!/usr/bin/env python
# -*- encoding: utf-8 -*-

'''
Created on 2015年1月13日

@author: cuckoocs
'''
import codecs
from urllib2 import urlopen
import re

start = 50000
end = 55000

def read_local():
    with open('/Users/monstar-216/文档库/_book/smalltalk/11909.txt', 'rb') as f:
        i = 0
        while(i<end):
            i += 1
            if i > start:
                print f.readline().decode('gbk')
            else:
                f.readline()
        

url = 'http://www.ybdu.com/xiaoshuo/13/13180/3398161.html'
def read_network(url):
    request = urlopen(url)
    html_content = request.read()
    request.close()
    content = unicode(html_content, 'gbk').encode('utf-8')
    pattern = r'<div id="htmlContent" class="contentbox">[\s\S]*?<div'
    rst = re.findall(pattern, content, 0)
    temp = rst[0].replace('<div id="htmlContent" class="contentbox">', '')
    temp = temp.replace('<br />', '')
    temp = temp.replace('&nbsp;', '')
    print temp.replace('<div', '')
    

url2 = 'http://www.biquge.la/book/295/234075.html'
def read_network2(url):
    request = urlopen(url)
    html_content = request.read()
    request.close()
    content = unicode(html_content, 'gbk').encode('utf-8')
    pattern = r'<div id="content"[\s\S]*?</div>'
    rst = re.findall(pattern, content, 0)
    temp = rst[0].replace('<div id="content" style="font-size: 10pt;">', '')
    temp = temp.replace('<br />', '')
    temp = temp.replace('&nbsp;', '')
    print temp.replace('</div>', '')


if __name__ == '__main__':
    read_network2(url2)